---
title: "Preparing data for study 3 experiment"
output: html_document
---

# 1. Basic system setup

```{r setup, include=FALSE}
knitr::opts_chunk$set(cache=FALSE)

# load libraries
library('tidyverse')
library('survey')
library('stargazer')
library('MASS')
library('knitr')
library('kableExtra')
library('psych')
library('nFactors')
library('moments')
library('ggeffects')
library('mediation')
library('ordinal')

# set ggplot theme
theme_set(theme_gray() +
            theme(panel.grid.major = element_blank(),
            panel.grid.minor = element_blank(),
            panel.background = element_rect(colour = "black",
                                            size = 1, fill = NA)))
```


# 2. Loading data: study 3 experiment
```{r}
load(file = "replication_dataset_study3_experiment.RData")
```

# 3. Check internal consistency for combined variables

## for grp_malleaby 

```{r}
alpha_grp_malleaby <- study3_exp %>%
                        dplyr::select(V16,V22,V26,V28) %>%
                        filter(complete.cases(.)) # check wether NA should be removed
psych::alpha(alpha_grp_malleaby)$total$std.alpha
```


To calculate omega (an alternative measure of internal consistency)

```{r}
MBESS::ci.reliability(data=alpha_grp_malleaby, type="omega", conf.level = 0.95,
interval.type="bca", B=1000)

```


To obstain the number of factors using factor analysis

### factor analysis

```{r}

n.factors <- 1

fit <- factanal(alpha_grp_malleaby, 
                n.factors,                # number of factors to extract
                scores=c("regression"),
                n.obs=nrow(alpha_grp_malleaby))

print(fit, digits=2, cutoff=.3, sort=TRUE)

```


```{r}
head(fit$scores)
```


```{r}
library('psych')
corMat <- cor(alpha_grp_malleaby)

(faPC  <- fa(r=corMat, nfactors=1, rotate="varimax", n.obs = nrow(alpha_grp_malleaby)))
```


```{r}

# Determine Number of Factors to Extract
library(nFactors)

ev <- eigen(cor(alpha_grp_malleaby)) # get eigenvalues
ap <- parallel(subject=nrow(alpha_grp_malleaby),
               var=ncol(alpha_grp_malleaby),
               rep=100,
               cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

```


## for ntl_homogty

```{r}
alpha_ntl_homogty <- study3_exp %>%
                      dplyr::select(V15,V34,V47,V50) %>%
                      filter(complete.cases(.))
psych::alpha(alpha_ntl_homogty)$total$std.alpha
```


To calculate omega (an alternative measure of internal consistency)

```{r}
MBESS::ci.reliability(data=alpha_ntl_homogty, type="omega", conf.level = 0.95,
interval.type="bca", B=1000)

```

## for stereotypes

```{r}
alpha_stereotypes <- study3_exp %>%
                      dplyr::select(V19:V20) %>%
                      filter(complete.cases(.))
psych::alpha(alpha_stereotypes)$total$std.alpha

```


To calculate omega (an alternative measure of internal consistency)

```{r}
MBESS::ci.reliability(data=alpha_stereotypes, type="omega", conf.level = 0.95,
interval.type="bca", B=1000)

```

## for contact_immigrt

```{r}
alpha_contact_immigrt <- study3_exp %>%
                          dplyr::select(V54:V55) %>%
                          filter(complete.cases(.))
psych::alpha(alpha_contact_immigrt)$total$std.alpha

```

To calculate omega (an alternative measure of internal consistency)
```{r}
MBESS::ci.reliability(data=alpha_contact_immigrt, type="omega", conf.level = 0.95,
interval.type="bca", B=1000)

```

# 4. Frequency distribution of attitudes towards migrant

Distribution of attitudes towards migrants before recoding
```{r}

ggplot(data = study3_exp, aes(x = V7)) +
  geom_histogram(binwidth = 0.5) +
  labs(x = "Attitudes towards migrants")

```

```{r}
skewness(study3_exp$V7)
```


# 5. Summary statistics ---------------------------------------------------

```{r}
# report
summary_stat_study3_exp <- as.data.frame(study3_exp)
summary_stat_study3_exp <- summary_stat_study3_exp %>% 
                            dplyr::select(version, grp_malleaby,
                                          V7, stereotypes, V42, V48,
                                          V8, V9, V11,
                                          contact_immigrt, V63, V59, V64, ntl_homogty)

# rename variables                                               
colnames(summary_stat_study3_exp) <- c("version",
                            "grp_malleaby",
                            "attitu_immigrt",
                            "stereotypes",
                            "social_distance",
                            "cultural_threat",
                            "economic_threat",
                            "satisfied_hk_econ",
                            "proud_HKer",
                            "contact_immigrt",
                            "household_income",
                            "sex",
                            "place_of_birth",
                            "ntl_homogty")

# this returns the summary stat for one variable by group version.
# think about how to apply it to all variables

mean_v1 <- lapply(summary_stat_study3_exp[summary_stat_study3_exp$version == 1,],
                  mean, na.rm=TRUE) %>% as.data.frame() %>% t()
mean_v2 <- lapply(summary_stat_study3_exp[summary_stat_study3_exp$version == 2,],
                  mean, na.rm=TRUE) %>% as.data.frame() %>% t()
sd_v1 <- lapply(summary_stat_study3_exp[summary_stat_study3_exp$version == 1,],
                  sd, na.rm=TRUE) %>% as.data.frame() %>% t()
sd_v2 <- lapply(summary_stat_study3_exp[summary_stat_study3_exp$version == 2,],
                  sd, na.rm=TRUE) %>% as.data.frame() %>% t()
min_v1 <- lapply(summary_stat_study3_exp[summary_stat_study3_exp$version == 1,],
                min, na.rm=TRUE) %>% as.data.frame() %>% t()
min_v2 <- lapply(summary_stat_study3_exp[summary_stat_study3_exp$version == 2,],
                min, na.rm=TRUE) %>% as.data.frame() %>% t()
max_v1 <- lapply(summary_stat_study3_exp[summary_stat_study3_exp$version == 1,],
                max, na.rm=TRUE) %>% as.data.frame() %>% t()
max_v2 <- lapply(summary_stat_study3_exp[summary_stat_study3_exp$version == 2,],
                max, na.rm=TRUE) %>% as.data.frame() %>% t()
obs_v1 <- lapply(summary_stat_study3_exp[summary_stat_study3_exp$version == 1,],
                 function(x) { length(which(!is.na(x)))} ) %>%
                    as.data.frame() %>% t()
obs_v2 <- lapply(summary_stat_study3_exp[summary_stat_study3_exp$version == 2,],
                 function(x) { length(which(!is.na(x)))} ) %>%
                    as.data.frame() %>% t()

tt_tvalue <- c()
tt_pvalue <- c()
tt_df <- c()
for (i in names(summary_stat_study3_exp)[2:length(summary_stat_study3_exp)]) {
  
  g1 <- summary_stat_study3_exp[i][summary_stat_study3_exp$version == 1,]
  g2 <- summary_stat_study3_exp[i][summary_stat_study3_exp$version == 2,]
  
  tt_temp <- t.test(g1, g2)
  
  tt_tvalue_temp <- as.data.frame(tt_temp$statistic)
  rownames(tt_tvalue_temp) <- i
  
  tt_pvalue_temp <- as.data.frame(tt_temp$p.value)
  rownames(tt_pvalue_temp) <- i
  
  tt_df_temp <- as.data.frame(length(na.omit(g1))+length(na.omit(g2))-2)
  rownames(tt_df_temp) <- i
  
  tt_tvalue <- rbind(tt_tvalue, tt_tvalue_temp)
  tt_pvalue <- rbind(tt_pvalue, tt_pvalue_temp)
  tt_df <- rbind(tt_df, tt_df_temp)
  
}
# rename columns
colnames(tt_tvalue) <- "t_value"
colnames(tt_pvalue) <- "p_value"
colnames(tt_df) <- "df"
# add first item for version
tt_tvalue <- rbind(data.frame(t_value = NA), tt_tvalue)
tt_pvalue <- rbind(data.frame(p_value = NA), tt_pvalue)
tt_df <- rbind(data.frame(df = NA), tt_df)
# add rowname for version
rownames(tt_tvalue)[1] <- rownames(tt_pvalue)[1] <- "version"


# create final table
stat_table <- data.frame(mean_v1 = mean_v1, sd_v1 = sd_v1,
                         min_v1 = min_v1, max_v1 = max_v1,
                         obs_v1 = obs_v1,
                         mean_v2 = mean_v2, sd_v2 = sd_v2,
                         min_v2 = min_v2, max_v2 = max_v2,
                         obs_v2 = obs_v2)
stat_table <- cbind(stat_table, tt_tvalue, tt_pvalue, tt_df)

rownames(stat_table) <- c("version",
                            "Perceived group malleability",
                            "Attitudes toward migrants",
                            "Stereotypes",
                            "Social distance",
                            "Cultural threat",
                            "Economic threat",
                            "Satisfied with Hong Kong's economy",
                            "Proud of ��HKer�� identity",
                            "Contact with migrants",
                            "Family income",
                            "Sex",
                            "Place of birth",
                            "Perceived national homogeneity")

stat_table <- stat_table[-1,]

```

## Table 8. Summary Statistics of the Experiment in Study 3

This table provides a simple t-test to check the group differences between version 1 and version 2. It contains the t values and p-values and is on the right hand side of the table.

```{r results='asis'}
# stargazer(stat_table, type = "text", summary = FALSE, digits = 2) ## alternative output format
kable(stat_table, format = "html", booktabs = TRUE, digits = 2) %>% 
  add_header_above(c(" ", "Heterogeneous Group" = 5, "Homogeneous Group" = 5,
                     "Difference in Means" = 3)) %>%
  kable_styling(bootstrap_options = c("hover", "condensed"),
                 full_width = F, latex_options = "scale_down") %>% 
  column_spec(12:14, bold = TRUE)
```

# 5. Regression analysis --------------------------------------------------

```{r}
# declare variables as ordered factor except some
df.test <- summary_stat_study3_exp %>% 
  mutate_all(as.numeric) %>% 
  mutate_at(vars(-attitu_immigrt, -ntl_homogty, -grp_malleaby, -stereotypes,
                 -satisfied_hk_econ, -proud_HKer, -contact_immigrt,
                 -version, -place_of_birth, -sex),
            # these variables are filtered out and leave it as numeric
            as.ordered) %>% 
  mutate_at(vars(attitu_immigrt, version, place_of_birth, sex), as.factor) # declare as factor

```


```{r}
m1.1_study3_exp <- lm(grp_malleaby ~
             version,
           data = df.test)

m1.2_study3_exp <- lm(grp_malleaby ~
                    version +
                    satisfied_hk_econ +
                    proud_HKer +
                    contact_immigrt +
                    household_income +
                    place_of_birth +
                    sex,
                  data = df.test)

m2_study3_exp <- polr(attitu_immigrt ~
                  version +
                  satisfied_hk_econ +
                  proud_HKer +
                  contact_immigrt +
                  household_income +
                  place_of_birth +
                  sex,
                  data = df.test,
                  method = "logistic",
                  Hess = TRUE)

m3_study3_exp <- lm(stereotypes ~
                    version +
                    satisfied_hk_econ +
                    proud_HKer +
                    contact_immigrt +
                    household_income +
                    place_of_birth +
                    sex,
                  data = df.test)

m4_study3_exp <- polr(social_distance ~
                  version +
                  satisfied_hk_econ +
                  proud_HKer +
                  contact_immigrt +
                  household_income +
                  place_of_birth +
                  sex,
                data = df.test,
                method = "logistic",
                Hess = TRUE)

m5_study3_exp <- polr(cultural_threat ~
                    version +
                    satisfied_hk_econ +
                    proud_HKer +
                    contact_immigrt +
                    household_income +
                    place_of_birth +
                    sex,
                  data = df.test,
                  method = "logistic",
                  Hess = TRUE)

m6_study3_exp <- glm(economic_threat ~
                    version +
                    satisfied_hk_econ +
                    proud_HKer +
                    contact_immigrt +
                    household_income +
                    place_of_birth +
                    sex,
                  data = df.test,
                  family=binomial(link='probit'))
```

## Table 9. Effects of the Treatment on Migration-Related Attitudes in Study 3

```{r results='asis', warning=FALSE}
stargazer(m2_study3_exp,m3_study3_exp, m4_study3_exp, m5_study3_exp, m6_study3_exp,
            type = "text",
          digits = 2,
            dep.var.labels = c("Attitudes towards migrants",
                            "Stereotypes",
                            "Social distance",
                            "Cultural threats",
                            "Economic treats"),
          covariate.labels = c("Treatment (homogeneous condition",
                               "Satisfied with Hong Kong economy",
                               "Proud of \"HKer\" identity",
                               "Contact with migrants",
                               "Family income: HK$10,001�V25,000",
                               "Family income: HK$25,000+",
                               "Place of birth",
                               "Sex (Female)"),
          notes = "Entries are coefficients from various types of regressions; standard errors are in parentheses.",
          notes.align = "l",
          column.sep.width = "1pt")

```


# 6 Predicted value for attitudes towards migrants --------------------------------------

```{r}
m2.1 <- glm(attitu_immigrt ~
                  version +
                  satisfied_hk_econ +
                  proud_HKer +
                  contact_immigrt +
                  household_income +
                  place_of_birth +
                  sex,
                  data = df.test,
           family=binomial(link='probit'))
```

```{r}
# use ggeffects to get the predicted probabilities across different values of "grp_malleaby"
m2.1_predict <- ggpredict(m2.1, terms = "version")

m2.1_predict
```

## Figure 3. Predicted levels of support toward migrants in Study 3.
The visualization.

```{r}
m2.1_predict$x <- c("Malleable", "Fixed")
m2.1_predict$x <- factor(m2.1_predict$x, levels = c("Malleable", "Fixed"))

ggplot(m2.1_predict, aes(x = as.factor(x), y = predicted)) +
  geom_point() +
  geom_errorbar(aes(ymax = conf.high, ymin = conf.low),
                width = 0.2) +
  labs(y = "Predicted probabilities of attitudes towards migrants",
       x = "Conditions") +
   theme(legend.position="none")
```

```{r}

```

